Source of the judges.
In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
plt.style.use('ggplot')
import dateutil.parser
import re
import time
In [2]:
url = "http://www.bvger.ch/gericht/richter/00563/00580/index.html?lang=de"
response = requests.get(url)
response.encoding = 'utf-8' # Fix Encoding
judges_IV_soup = BeautifulSoup(response.text, 'html.parser')
judges_IV = judges_IV_soup.find_all('div', {'class': 'contentFlex flexUnterseite'})
In [3]:
def extract_judges(html_elements):
"""Process a list of html elements containig data about the judges"""
lst = []
for soup in html_elements:
#name
name = soup.find('a').text
name = re.sub('\(.+\)', '', name).strip()
#url
html = soup.find('a').get('href')
url = 'http://www.bvger.ch' + html
response = requests.get(url)
response.encoding = 'utf-8'
judges_text = BeautifulSoup(response.text, 'html.parser')
partei = judges_text.find('div', {'class': 'webText flexTinymceDiv'}).text[-3:]
partei = partei.replace('los', 'parteilos').replace('ux.', 'FDP').replace('PLR', 'FDP')
partei = partei.replace('que', 'parteilos').replace('üne', 'Grüne')
partei = partei.replace('UDC', 'SVP').replace('DC.', 'SVP').replace('DC.', 'FDP')
partei = partei.replace('rts', 'Grüne').replace('üne', 'Grüne').replace('GrGrüne', 'Grüne')
partei = partei.replace('UDC', 'SVP').replace('DC.', 'SVP').replace('DC.', 'FDP')
partei = partei.replace('07.', 'parteilos').replace('ale', 'GLP')
partei = partei.replace('PS', 'SP').replace('VP.', 'SVP')
judge = {'Name': name,
'Partei': partei.strip(),
}
lst.append(judge)
return lst
In [4]:
judges_IV_list = extract_judges(judges_IV)
In [5]:
url = "http://www.bvger.ch/gericht/richter/00563/00581/index.html?lang=de"
response = requests.get(url)
response.encoding = 'utf-8'
judges_V_soup = BeautifulSoup(response.text, 'html.parser')
judges_V = judges_V_soup.find_all('div', {'class': 'contentFlex flexUnterseite'})
In [6]:
judges_V_list = extract_judges(judges_V)
In [7]:
judges_list = judges_IV_list + judges_V_list
In [8]:
judges_list = pd.DataFrame(judges_list)
In [9]:
first_names = []
last_names = []
for name in judges_list['Name']:
parts = name.split(' ')
if len(parts) == 3 and parts[2] == 'R.':
# Handle special case for David R. Wenger
first_names.append(' '.join(parts[1:]))
last_names.append(parts[0])
else:
# Normal Case: Last Element is first name, everything else is last name
first_names.append(parts[-1])
last_names.append(' '.join(parts[:-1]))
In [10]:
vorname = pd.DataFrame(first_names)
nachname = pd.DataFrame(last_names)
In [11]:
df = pd.concat([judges_list, nachname, vorname], axis=1)
df.columns = [['Name', 'Partei', 'Nachname', 'Vorname']]
In [12]:
df['Nachname Vorname'] = df['Vorname'] + ' ' + df['Nachname']
In [13]:
df.to_csv('richter_partei.csv', index=False)
In [14]:
df
Out[14]:
In [15]:
df_partei_count = pd.DataFrame(df['Partei'].value_counts())
In [16]:
df_partei_count = df_partei_count.reset_index()
In [17]:
total = sum(list(df_partei_count['Partei']))
In [18]:
def per(x):
percentage = x / total * 100
return percentage
In [19]:
df_partei_count['percentage'] = round(df_partei_count['Partei'].apply(per))
In [20]:
df_partei_count
Out[20]:
In [ ]: